###################
## Nested Pseudo likelihood Estimation Heterogeneous ***WITH*** fe at parish level
## To be run after Mlogit_NAPPHeterov4.R
## Where we guarantee that KS probabilities are within [0,1]
## We guarantee inner loop (fixed point expectations) convergence using tol 10e-10 for the sup-norm stopping criterion
## We follow KS 2012 and KS 2018 using outer loope iterations k=50 and guaranteeing inner loop convergence (see above)
##DEPENDING ON THE EXERCISE: BENCHMARK OR ROBUSTNESS, CHANGE THE NAME OF ALL DATASETS, that we either save or load below, TO ONE OF THE FOLLOWING:
## w50list atleast1 w100list nmovers age1530 w50list atmost10 h1 h2.
## Therefore, if one is running the benchmark specifiction, line should be:
## load(file=paste(root,"NAPP/ReStat/pooleddataNAPPneww50list.RData",sep="" ))
###################

rm(list=ls(all=TRUE))
set.seed(12345)
#root <- "S:/NetworkParish/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
#root <- "C:/HPC/"
setwd(paste(root,"Results/ReStat/",sep=""))
#codes <- "C:/HPC/CODE/"
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu
if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
sink(paste(rootsave,"NAPPEstimationHetewfenew.txt")) 
options(max.print=1000000)
gc()
#######
#Load Data

load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnew.RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonew.RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnew.RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonew.RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/datalistNAPPnew.RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
load(file=paste(root,"NAPP/ReStat/coordslistNAPPnew.RData",sep="" )) #coordslist, Lists of coordinates per parish
load(file=paste(root,"NAPP/ReStat/dneighboursNAPPnew.RData",sep="")) #dneighbours, Lists of neigbours identities per parish
load(file=paste(root,"NAPP/ReStat/wlistNAPPnew.RData",sep="")) #wlist, Sparse Lists of neighbours per parish


#define which variables we include in the estimation and which formula we use
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "SEX", "stayerc"
wvar <- "sw"
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep="")))          
fformula <- fformula1
namesmlexer <- c("Mlmwnew.RData","Mlmtwnew.RData")
#sum(!is.na(fformulas))

#Outer Iteration: Maximum likelihood
alter <- sort(unique(pooleddata$alt))
L <- length(alter)
it0 <- 1 
maxiter0 <- 50
zero0 <- 1E-8
Jeit <- matrix(rep(0,maxiter0*L),nrow=maxiter0) #Keep track of the coefficients
ndJeit <- L

#To keep original pooleddata to implement Weidner (2012) method
pooleddata0 <- pooleddata
datalist0 <- datalist
pooleddata <- tolong(pooleddatalist,"choice",c("sw.")) # to convert datalist class "data.table" into "mlogit.data"
pooleddatalist0 <- pooleddatalist
coordslist0 <- coordslist
wlist0 <- wlist
#wklist0 <- wklist

#defining weight
weigh <- wlist

#To keep original pooleddata to implement Weidner (2012) method
pooleddata00 <- pooleddata0

#to define outer iteration of expectations
ndexpectito <- L
ndexpectitoall <- matrix(0,nrow=maxiter0) #Keep track of the tolerance level
expeco <- subset(pooleddatalist,select=paste(wvar,".",alter,sep=""))

while (it0<=maxiter0 & ndexpectito > zero0) {
  print(paste("ML iteration: ",it0, sep=""))
  
  #Estimating
  if (it0==1) {
    load("Mlmtwnew.RData") 
    ml.m.w <- ml.mt.w   
    ml.m.w0 <- ml.m.w
  }  else  ml.m.w <- mnlogit(fformula, pooleddata, "alt")
  
  #Define starting conditions, if we want to give them staring values based on theta
  #if (it0==1) ml.m.w$coeff[paste(wvar,":",alter,sep="")] <- runif(L,2,4) 
  
  #Keeping coeff  
  Jeit[it0,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")] 
  print(Jeit[it0,])
  ndexpectitoall[it0,] <- ndexpectito 
  print(ndexpectitoall[it0,])
  
  #Inner iteration, fixed point in beliefs
  it <- 1 
  zero <- 1E-8
  #maxiter <- 1 #if iterating a la Aguirregabiria and Mira (2007) or a la Weidner (2012)
  maxiter <- 100 #If iterating with the fixed point a la Lee Lin Liu (2014)
  expectit <- matrix(rep(0,maxiter*L),nrow=maxiter)
  ndexpectit <- L
  
  #subsetting sample to 
  expec <- subset(pooleddatalist,select=paste(wvar,".",alter,sep=""))
  #iteration of beliefs
  while (it<=maxiter & ndexpectit > zero) {
    print(paste("Inner Iteration ",it,sep=""))    
    #keepint track of expectations
    expectit[it,] <- apply(expec,2,mean)
    
    
    alpha <- .1 #Kasahara and Shimotsu (2012)
    #Predict probabilities on the pooled data list
    tempprob <- predict(ml.m.w,pooleddata,probability=TRUE)
    for(aa in sort(alter, decreasing=TRUE) ){
      ap <- which(alter==aa)
      if (aa != min(alter)) { #to guarantee KS probabilities are within [0,1] bounds
        if (AM==1) { 
          pooleddatalist[,paste(wvar,".",aa,sep="")] <- ml.m.w$probabilities[,ap] #Aguirregabiria and Mira (2007)
        } else {
          pooleddatalist[,paste(wvar,".",aa,sep="")] <- ((tempprob[,ap])^alpha)*((ml.m.w0$probabilities[,ap])^(1-alpha)) #Kasahara and Shimotsu (2012)
        }
      } else {
        if (AM==1) { 
          pooleddatalist[,paste(wvar,".",aa,sep="")] <- 1-apply(ml.m.w$probabilities[,paste(alter[alter!=min(alter)],sep="")],1,sum) 
        } else {
          pooleddatalist[,paste(wvar,".",aa,sep="")] <- 1-apply(((tempprob[,paste(alter[alter!=min(alter)],sep="")])^alpha)*((ml.m.w0$probabilities[,paste(alter[alter!=min(alter)],sep="")])^(1-alpha)),1,sum) #Kasahara and Shimotsu (2012)
        }
      }
    }
    
    #splitting the previous data into a list 
    datalist  <- split(pooleddatalist, list(pooleddatalist$IDSocial))
    
    #spatially weighting
    provweigh <- vector(length=L,mode="list")
    for(aa in alter){
      ap <- which(alter==aa)
      provweigh[[ap]] <- mapply(weightinga,datalist,weigh)
    }
    
    #updating pooled datalist
    pooleddatalist <- rbindlist(datalist)
    for(aa in alter){
      ap <- which(alter==aa)
      pooleddatalist[[paste(wvar,".",aa,sep="")]] <- unlist(provweigh[[ap]])
    }
    rm(provweigh)
    attr(pooleddatalist,"class") <- "data.frame"
    #updating expectations
    expec1 <- subset(pooleddatalist,select=paste(wvar,".",alter,sep=""))
    
    #Updating pooleddata (long format)
    for(aa in alter){
      pooleddata[pooleddata$alt==aa,][[paste(wvar,sep="")]] <- pooleddatalist[[paste(wvar,".",aa,sep="")]] 
    }
    
    if (it>=2) ndexpectit <- max(as.matrix(abs(expec1-expec))) # the maximum of all matrix entries ndexpectit <- sum(distr(expec,expec1)) #element by element
    print(paste("Max abs diff inner: ",ndexpectit,sep=""))
    expec <- expec1
    it <- it+1
  }
  if (it<=maxiter & ndexpectit < zero) print(paste("Fixed point in beliefs convergence reached at iter: ",it-1,sep="")) else print(paste("Fixed point in beliefs one step update it: ",it,sep="")) #print(paste("Fixed point in beliefs FAILED to converge after it: ",it,sep=""))
  
  #updating expectations
  expec1o <- subset(pooleddatalist,select=paste(wvar,".",alter,sep=""))
  
  if (it0>=2) ndexpectito <- max(as.matrix(abs(expec1o-expeco))) # the maximum of all matrix entries ndJeit <- sum(abs(Jeit[it0,]-Jeit[it0-1,]))
  print(paste("Diff Abs max ",ndexpectito,sep=""))
  expeco <- expec1o
  
  #To keep the previous probabilities to perform Kasahara and Shimotsu
  ml.m.w0 <- ml.m.w
  it0 <- it0+1
}
print(paste("Outer ML Heterogenous iter converged at iter: ",it0-1," using weights: ",wvar,sep=""))
Jeit[it0-1,] <- summary(ml.m.w)$coeff[paste(wvar,":",alter,sep="")]
print(Jeit[it0-1,])
ndexpectitoall[it0-1,] <- ndexpectito 
print(ndexpectitoall[it0-1,])

#Saving last iteration results
save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnew.RData",sep=""))
print(summary(ml.m.w))

#Plot convergence in coefficientes 
#coefficients
colnames(Jeit)<-c(paste("J.",alter,sep=""))
dataJeit<-as.data.frame(Jeit[c(1:(it0-1)),])
dataJeit$iter <- c(1:nrow(dataJeit))
meltdataJeit<-melt(dataJeit,id="iter")
cairo_ps(paste(rootsave,"JePMLmtwfinalnew.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
print(ggplot(meltdataJeit,aes(x=iter,y=value,colour=variable,group=variable)) + geom_line()+ ylab(expression(paste(J[y]^{t})))+xlab("t"))
dev.off()
save(dataJeit,file=paste(rootsave,"JePMLmtwfinalnew.RData",sep=""))

#Plot convergence in Infinity Norm
#coefficients
colnames(ndexpectitoall)<-"AbsMax"
dataInftyNorm<-as.data.frame(ndexpectitoall[c(3:(it0-1))])
colnames(dataInftyNorm)<-"AbsMax"
dataInftyNorm$iter <- c(1:nrow(dataInftyNorm))
meltdataInftyNorm<-melt(dataInftyNorm,id="iter")
cairo_ps(paste(rootsave,"InftyNormMLmtwfinalnew.eps",sep=""), width=7, height=7) #to keep transparency from the intervals
print(ggplot(meltdataInftyNorm,aes(x=iter,y=value,colour=variable,group=variable)) + geom_line()+ ylab(expression(InftyNorm))+xlab("t"))
dev.off()
save(dataInftyNorm,file=paste(rootsave,"InftyNormMLmtwfinalnew.RData",sep=""))


####################################################
## Testing IIA ##
####################################################
print("Test IIA")
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "SEX", "stayerc"
wvar <- "sw"
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
fformula1 <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep="")))          
fformula <- fformula1
load(file=paste(rootsave,"PMLmtwfinalnew.RData",sep=""))
pooleddata <- ml.m.w$model
ml.m.w0 <- mnlogit(fformula, pooleddata, "alt", reflevel="0")
ml.m.w0wo1 <- mnlogit(fformula, pooleddata, "alt", alt.subset=c("0","2","3","4","5"), reflevel="0")
ml.m.w0wo2 <- mnlogit(fformula, pooleddata, "alt", alt.subset=c("0","1","3","4","5"), reflevel="0")
ml.m.w0wo3 <- mnlogit(fformula, pooleddata, "alt", alt.subset=c("0","1","2","4","5"), reflevel="0")
ml.m.w0wo4 <- mnlogit(fformula, pooleddata, "alt", alt.subset=c("0","1","2","3","5"), reflevel="0")
ml.m.w0wo5 <- mnlogit(fformula, pooleddata, "alt", alt.subset=c("0","1","2","3","4"), reflevel="0")

print("IIA test removing occ==1")
hmftestjg(ml.m.w0,ml.m.w0wo1)
print("IIA test removing occ==2")
hmftestjg(ml.m.w0,ml.m.w0wo2)
print("IIA test removing occ==3")
hmftestjg(ml.m.w0,ml.m.w0wo3)
print("IIA test removing occ==4")
hmftestjg(ml.m.w0,ml.m.w0wo4)
print("IIA test removing occ==5")
hmftestjg(ml.m.w0,ml.m.w0wo5)


####################################################
## Interaction: non-movers#
####################################################

#### Adding interaction term with nmovers to the last iteration estimation 
print("Interaction speficication for non movers")
pooleddatanmovers <- ml.m.w$model
fformula2 <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar, "+","stayerp",":",wvar,sep="")))
ml.m.w <- mnlogit(fformula2, pooleddatanmovers, "alt")
print(summary(ml.m.w))
save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnewinteractnmovers.RData",sep=""))

### Restricting sample to only nonmovers 
#print("Restricting sample to non movers")
#pooleddatanmovers <- pooleddatanmovers <- ml.m.w$model[ml.m.w$model$stayerp==1,]
#fformula2 <- mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(setdiff(xvars,"stayerp"), collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep="")))
#ml.m.w <- mnlogit(fformula2, pooleddatanmovers, "alt")
#print(summary(ml.m.w))
#save(ml.m.w,file=paste(rootsave,"PMLmtwfinalnewrestrictnmovers.RData",sep=""))
sink()
